Setup

library(conos)
library(qs)
library(dplyr)
library(magrittr)
library(pagoda2)
library(scHelper) # github.com/rrydbirk/scHelper
library(cacoa)
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(ggplot2)
library(rstatix)
library(ggpubr)
library(ComplexHeatmap)
library(destiny)
library(Seurat)
library(harmony)
library(slingshot)
library(circlize)
library(cowplot)
library(sccore)
library(corrplot)

meta <- qread("meta.qs")

tt <- Sys.time()

Figure 1

Load data

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

Figure 1G

con.major$plotGraph(groups = anno.major, 
                    plot.na = FALSE, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    show.labels = TRUE, 
                    font.size = 5) + 
  labs(x = "UMAP1", y = "UMAP2") + 
  theme(line = element_blank()) + 
  scale_color_manual(values = RColorBrewer::brewer.pal(9, "Set1"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 1H

cm.merged <- con.major$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.major), ]

c("ADIPOQ", "PLIN1", # Adipocytes
  "PDGFRA", "DCN", # ASPCs
  "MECOM", "ADGRL4", # ECs
  "PKHD1L1", "PROX1", # LECs
  "MYOCD", "MYH11", # SMCs
  "STEAP4", "GIPR", # Pericytes
  "IL7R", "CD3E", # Lymphoid
  "KIT", "CPA3", # Mast
  "MRC1", "F13A1") %>% # Myeloid
  sccore::dotPlot(., 
                  cm.merged, 
                  factor(anno.major[rownames(cm.merged)], 
                         levels = c("Adipocytes", 
                                    "ASPCs", 
                                    "ECs", 
                                    "Lymphatic ECs", 
                                    "SMCs", 
                                    "Pericytes", 
                                    "Lymphoid immune cells", 
                                    "Mast cells", 
                                    "Myeloid immune cells")), 
                  gene.order = ., 
                  cols = c("white","grey20"))

Figure 1I

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

p <- cao$plotCellGroupSizes(show.significance = TRUE, 
                            filter.empty.cell.types = FALSE)
plot.dat <- p$data %>% 
  mutate(sex = rep(meta$sex, 9)) %>% 
  mutate(sample = stringi::stri_dup(LETTERS[seq(14)], 3) %>% 
           paste(collapse = "") %>% 
           strsplit("") %>% 
           unlist() %>% 
           rep(9)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% 
           gsub("Vis", "vis", .),
         facet = ifelse(variable %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"),
         variable = factor(variable, levels = c("Adipocytes", "ASPCs", "ECs", "Myeloid immune cells", "Lymphatic ECs", "SMCs", "Pericytes", "Lymphoid immune cells", "Mast cells")) %>% 
           renameAnnotation("Myeloid immune cells", "   Myeloid immune cells")) %>% 
  mutate(sex_visit_anno = paste0(sex_visit, "_", variable))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>%
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T),
         facet = ifelse(ct %in% c("SMCs", "Mast cells", "Lymphatic ECs", "Pericytes", "Lymphoid immune cells"), "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(is.na(x[10])) "" else if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p), !var %in% c("Adipocytes", "Lymphoid immune cells")) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(facet = ifelse(var %in% c("SMCs","Mast cells","Lymphatic ECs","Pericytes","Lymphoid immune cells"), "low", "high"))

# Plot
p.high <- plot.dat %>% 
  filter(facet == "high") %>% 
  ggplot(aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 65, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

x.cor <- 4

p.low <- plot.dat %>% 
  filter(facet == "low") %>%
  ggplot(aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 15, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = FALSE) # Wilcoxon

pp <- plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,1.5))

pp

Figure 2

Load data

con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")

Figure 2A

con$plotGraph(groups = anno.vascular, 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "UMAP", 
              shuffle.colors = TRUE, 
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "UMAP1", y = "UMAP2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(9, "Purples")[-c(1,2)][c(1,4,7,2,6,3,5)])
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 2B

cm.merged <- con$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.vascular), ]

c("PCSK5", "NEBL", "FN1", "CADM2", "BTNL9", "CEACAM1", "IL1R1", "VCAN", "ACKR1", "PKHD1L1", "MMRN1", "RELN", "MYH11", "ACTA2", "MYOCD", "COL25A1", "STEAP4", "NCKAP5") %>%  
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.vascular %>% factor(levels = c("arEC", "cEC", "cvEC", "vEC", "lEC", "SMCs", "Pericytes")), 
                  gene.order = ., 
                  cols = c("white","purple3"))

Figure 2D

Here, we’re plotting proportions of total cells, so we load the Conos object and the annotation for all cells first.

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3",
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.vascular)] %>% 
  {factor(c(., anno.vascular))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)
plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.vascular)) %>% 
  mutate(sex = rep(meta$sex, anno.vascular %>% levels() %>% length())) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         facet = ifelse(variable %in% c("cvEC", "lEC"), "low", "high"),
         variable = factor(variable, levels = c("arEC", "cEC", "vEC", "SMCs", "Pericytes", "cvEC", "lEC")) %>% 
           renameAnnotation("lEC", "         lEC"))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T),
         facet = ifelse(ct %in% c("cvEC", "         lEC"), "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(y.position = c(y.position[1]+1, y.position[2], y.position[3]+1, y.position[4], y.position[5]+1, y.position[6:7]),  # Manually adjust overlapping lines
         facet = ifelse(var %in% c("cvEC", "         lEC"), "low", "high"))

# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 22.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 4.5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,0.8))

Figure 3

Load data

con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")

anno.myeloid <- anno.immune %>% 
  .[!. %in% c("NKT","NK","Tem","Th1","Th","Treg","B-cells")] %>% 
  factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC"))

Figure 3A

con$plotGraph(groups = anno.immune[!anno.immune == "B-cells"], 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "largeVis", 
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "largeVis1", y = "largeVis2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = c("#174D86", "#9EC3B5", "#356790", "#ADD0BA", "#628EA0", "#80A9AA", "#719CA5"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 3B

cm.merged <- con$getJointCountMatrix(raw = FALSE) %>% 
  .[rownames(.) %in% names(anno.myeloid), ]

c("LYVE1", "SELENOP", # ATM
  "FCN1","VCAN", # mono/mac
  "CYP27A1", "APOE", # Early LAM
  "LPL", "TREM2", # LAM
  "CLEC9A", "CADM1", # cDC1
  "CD1C", "CLEC10A", # cDC2
  "CCR7", "LAMP3") %>% # mDC
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.myeloid, 
                  gene.order = ., 
                  cols = c("white","#377EB8"))

Figure 3C

Here, we’re plotting proportions of total cells, so we load the Conos object and the annotation for all cells first.

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.myeloid)] %>% 
  {factor(c(., anno.myeloid))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)
plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.myeloid)) %>% 
  mutate(sex = rep(meta$sex, anno.myeloid %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         facet = ifelse(variable != "ATM", "low", "high"),
         variable = factor(variable, levels = c("ATM", "mono/mac", "Early LAM", "LAM", "cDC1", "cDC2", "mDC")) %>% 
           renameAnnotation("ATM", "         ATM"))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = TRUE),
         facet = ifelse(ct != "         ATM", "low", "high"))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = TRUE) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position)) %>% 
  mutate(facet = ifelse(var != "         ATM", "low", "high")) 

# Plot
p.high <- ggplot(plot.dat %>% filter(facet == "high"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat %>% filter(facet == "high"), aes(ct, y = 50, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "high"), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

x.cor = 1

p.low <- ggplot(plot.dat %>% filter(facet == "low"), aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = FALSE, data = sex.stat %>% filter(facet == "low"), aes(ct, y = 10, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test %>% filter(facet == "low") %>% mutate(x = x-x.cor, xmin = xmin-x.cor, xmax = xmax-x.cor), label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = FALSE) # Wilcoxon

plot_grid(plotlist = list(p.high, p.low), rel_widths = c(1,4.5))

Figure 3D

Load data

## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")

TREM2

gene_of_interest <- "TREM2"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

SPP1

gene_of_interest <- "SPP1"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

MMP9

gene_of_interest <- "MMP9"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

Figure 3E

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.myeloid)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.myeloid, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType(min.cell.frac = 0.1, 
                          min.cell.count = 5)
## Preparing matrices for DE
## Estimating DE per cell type
## DEs not calculated for 2 cell group(s): cDC1, mDC
de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.myeloid %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names()) %>% 
  .[grepl("ATM|Early LAM|LAM|mono/mac", .)]

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
genes <- de[c("ATM","Early LAM","LAM","mono/mac")] %>%
  {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
  lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
  lapply(pull, Gene) %>% 
  {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
  Reduce(c, .) %>% 
  .[match(unique(.), .)]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1.5] <- -1.5

Please note, the order of the clusters is not always reproducible.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=T,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=T)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_km = 5,
                              row_km_repeats = 500,
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")))

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))
## 
## 

Figures 3F,G

Preparations

cts <- c("Early LAM", "LAM", "mono/mac", "ATM")
cm.merged <- con$getJointCountMatrix(raw = TRUE)

# Get markers
markers <- con$getDifferentialGenes(groups = anno.myeloid %>% .[. %in% cts], 
                                    z.threshold = 1, 
                                    upregulated.only = TRUE, 
                                    append.specificity.metrics = TRUE, 
                                    append.auc = TRUE)
## Estimating marker genes per sample
## Aggregating marker genes
## Estimating specificity metrics
## All done!
genes <- markers$LAM %>% 
  filter(AUC > 0.8) %>% 
  pull(Gene)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.myeloid[anno.myeloid %in% cts] %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = TRUE) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure 3F

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% c("LPL")] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = "LPL", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 500, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

Figure 3G

plot.dat <- cm.pseudo %>%
  .[, colnames(.) %in% genes] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ")) %>% 
  group_by(visit, sample, anno, donor, sex, sex_visit) %>%
  summarize(value = mean(value)) %>% 
  filter(visit != "visit 2") %>% 
  mutate(visit = factor(visit),
         donor_anno = paste(strsplit(donor, "_") %>% sapply(\(x) paste(x[1], x[2], sep = "_")), anno, sep = "_")) %>% 
  arrange(visit, sample) %$% 
  split(., visit) %>% 
  {lapply(., filter, donor_anno %in% .[[2]]$donor_anno)} %>% 
  {data.frame(.[[1]], diff = .[[2]]$value - .[[1]]$value)}
## `summarise()` has grouped output by 'visit', 'sample', 'anno', 'donor', 'sex'.
## You can override using the `.groups` argument.
plot.dat %>% 
  ggplot(aes(anno, diff)) +
  geom_boxplot(aes(fill = sex)) +
  geom_hline(yintercept = 0, color = "black") +
  geom_point(aes(fill = sex), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean normalized pseudobulk difference\nin top gene marker expression", title = "LAM top markers") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c("darkred", "blue3")) +
  stat_compare_means(mapping = aes(fill = sex), method = "wilcox", label = "p.signif")
## Warning in stat_compare_means(mapping = aes(fill = sex), method = "wilcox", :
## Ignoring unknown aesthetics: fill

plot.dat %>% 
  mutate(sex_visit_anno = paste(sex_visit, anno, sep = "_")) %$% 
  split(diff, sex_visit_anno) %>% 
  lapply(t.test, alternative = "less")
## $`Female visit 1_ATM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -1.0351, df = 6, p-value = 0.1703
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##       -Inf 0.5643315
## sample estimates:
##  mean of x 
## -0.6432132 
## 
## 
## $`Female visit 1_Early LAM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -2.7224, df = 6, p-value = 0.01727
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##       -Inf -5.540808
## sample estimates:
## mean of x 
## -19.35798 
## 
## 
## $`Female visit 1_LAM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -1.0456, df = 6, p-value = 0.168
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##      -Inf 27.21876
## sample estimates:
## mean of x 
##  -31.7068 
## 
## 
## $`Female visit 1_mono/mac`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -2.7853, df = 6, p-value = 0.01589
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##       -Inf -3.298376
## sample estimates:
## mean of x 
## -10.90978 
## 
## 
## $`Male visit 1_ATM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -0.99723, df = 6, p-value = 0.1786
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##      -Inf 2.241065
## sample estimates:
## mean of x 
## -2.362556 
## 
## 
## $`Male visit 1_Early LAM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -2.5991, df = 4, p-value = 0.03005
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##       -Inf -3.660135
## sample estimates:
## mean of x 
##  -20.3602 
## 
## 
## $`Male visit 1_LAM`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -1.9463, df = 4, p-value = 0.06174
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##      -Inf 7.922337
## sample estimates:
## mean of x 
## -83.11198 
## 
## 
## $`Male visit 1_mono/mac`
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -3.814, df = 6, p-value = 0.004411
## alternative hypothesis: true mean is less than 0
## 95 percent confidence interval:
##       -Inf -9.223787
## sample estimates:
## mean of x 
## -18.80438

Figure 4

Load data

con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")

Figure 4A

con$plotGraph(groups = anno.aspc, 
              plot.na = FALSE, 
              size = 0.2,
              alpha = 1, 
              embedding = "largeVis",
              show.labels = TRUE, 
              font.size = 5) + 
  labs(x = "largeVis1", y = "largeVis2") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)])
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 4B

cm.merged <- con$getJointCountMatrix()

c("DPP4", "SEMA3C", "FBN1", # DPP4
  "CXCL14", "CFD", "C3", # CXCL14
  "PPARG", "ACACB", "SEMA3A", # PPARG
  "EPHA3", "MEOX2", "IGFBP7" # EPHA3
) %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.aspc %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")), 
                  gene.order = ., 
                  cols = c("white","green4"))

Figure 4C

Here, we’re plotting proportions of total cells, so we load the Conos object and the annotation for all cells first.

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3", 
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.aspc)] %>% 
  {factor(c(., anno.aspc))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = FALSE, 
                            filter.empty.cell.types = FALSE)
plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.aspc)) %>% 
  mutate(sex = rep(meta$sex, anno.aspc %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         variable = factor(variable, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T)

stat.test[1, "p.adj.signif"] <- "."
stat.test[15, "p.adj.signif"] <- "."

stat.test %<>% 
  filter(p.adj.signif != "ns", !is.na(p), !var %in% c("ASPC_CXCL14")) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 20, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Figure 4D

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.aspc)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.aspc, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType()
## Preparing matrices for DE
## Estimating DE per cell type
de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
genes <- de %>%
  {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
  lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
  lapply(pull, Gene) %>% 
  {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
  Reduce(c, .) %>% 
  .[match(unique(.), .)]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1

Please note, the order of the clusters is not always reproducible.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=TRUE,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=TRUE)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_km = 3,
                              row_km_repeats = 100,
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
                              row_dend_reorder = TRUE)

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))

Figure 4E

Please note, cluster number may be different.

genes.go <- clusterProfiler::bitr("GO:0097193", 
                                  fromType="GOALL", 
                                  toType="SYMBOL", 
                                  OrgDb='org.Hs.eg.db')$SYMBOL %>% 
  unique()
## 'select()' returned 1:many mapping between keys and columns
genes.cls <- x %>% 
  rownames() %>% 
  .[cls[[1]]] # Change if needed

genes <- intersect(genes.go, genes.cls)
cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
plot.dat <- cm.pseudo %>%
  .[, colnames(.) %in% genes] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ")) %>% 
  group_by(visit, sex, anno, sex_visit, sample) %>%
  summarize(value = mean(value))
## `summarise()` has grouped output by 'visit', 'sex', 'anno', 'sex_visit'. You
## can override using the `.groups` argument.
# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean normalized pseudobulk expression", title = "Apoptosis ontology gene expression") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 250, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure 4G

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>% 
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.comb <- factor(c(anno.adipocytes, anno.aspc))

plot.dat <- c("JUN(+)", "ZEB1(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>%
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         anno = unname(anno.comb)[match(rownames(.),
                                        names(anno.comb) %>% 
                                          strsplit("!!") %>% 
                                          sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno)) %>% 
  reshape2::melt(id.vars = c("visit", "anno", "sample")) %>% 
  mutate(anno_vis = paste0(anno,"_",visit),
         anno_var = paste0(anno,"_",variable)) %>% 
  group_by(visit, anno, variable, sample) %>% 
  summarize(mm = median(value)) %>% 
  ungroup() %>% 
  filter(variable == "JUN(+)",
         !anno == "Adipocytes") %>% 
  mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")),
         visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = meta$sex[match(.$sample, meta$sample)],
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
         anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))
## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.
# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean AUC", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Figure 5

Load data

con <- qread("con_adipocytes.qs", nthreads = 10)
anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")

missing.cells <- setdiff(rownames(con$embedding), 
                         names(anno.adipocytes)) %>% 
  {setNames(rep("", length(.)), .)} %>% 
  factor()

anno.adipocytes.ext <- factor(c(anno.adipocytes, missing.cells))

Figure 5A

con$plotGraph(groups = anno.adipocytes.ext, 
              show.labels = TRUE, 
              embedding = "largeVis") +
  labs(x = "largeVis1", y = "largeVis2") +
  theme(line = element_blank()) + 
  scale_colour_manual(values = c(RColorBrewer::brewer.pal(4, "Reds")[-1], "gray60"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure 5B

cm.merged <- con$getJointCountMatrix()

c("PLXDC2","CBLB","TCF4",
  "GPAM","ACSL1","LPL","DGAT2",
  "PRSS23","THSD7A","AL139317.5") %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.adipocytes, 
                  gene.order = ., 
                  cols = c("white","red3"))

Figure 5C

Preparations

cm.merged <- con$getJointCountMatrix(raw = TRUE) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.adipocytes)]

# Calculate DEGs using Cacoa
con.vis13 <- con$samples %>% 
  .[!grepl("vis2", names(.))] %>% 
  Conos$new()

sample.groups <- con.vis13$samples %>% 
  names() %>% 
  `names<-`(ifelse(grepl("vis1", .), "visit1", "visit3"), .)

cao <- Cacoa$new(con.vis13, 
                 sample.groups, 
                 anno.adipocytes, 
                 ref.level = "visit1", 
                 target.level = "visit3", 
                 n.cores = 10)

cao$estimateDEPerCellType()
## Preparing matrices for DE
## Warning in estimateDEPerCellTypeInner(raw.mats = raw.mats, cell.groups =
## cell.groups, : Excluded 1 sample(s) due to 'min.cell.count'.
## Estimating DE per cell type
de <- cao$test.results$de %>%
  lapply('[[', "res")

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)]

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
genes <- de %>%
  {`names<-`(lapply(names(.), \(nn) mutate(.[[nn]], ct = nn)), names(.))} %>% 
  lapply(filter, padj <= 0.05, abs(log2FoldChange) > 1) %>%
  lapply(pull, Gene) %>% 
  {sapply(names(.), \(nn) .[[nn]] %>% `names<-`(., rep(nn, length(.))))} %>% 
  Reduce(c, .) %>% 
  .[match(unique(.), .)]

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(vis, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[, idx$ord] %>% 
  Matrix::t() %>%
  scale() %>%
  Matrix::t()

# Ordering
spc <- con$getDatasetPerCell()

sepc <- meta$sex[match(spc, meta$sample)] %>% 
  setNames(names(spc))

sex.df <- sepc %>% 
  {data.frame(nn = names(.), sex = unname(.))} %>% 
  mutate(nn = strsplit(nn, "!!") %>% sget(1)) %>% 
  .[match(unique(.$nn), .$nn), ] %>% 
  filter(!grepl("pool", nn))

ord <- data.frame(Visit = colnames(x) %>%
                    setNames(colnames(x)) %>% 
                    strsplit("_|!!|vis") %>%
                    sget(4)) %>% 
  mutate(., 
         donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., 
         Sex = sex.df$sex[match(.$donor, sex.df$nn)],
         ct = rownames(.) %>% strsplit("!!") %>% sget(2)) %>% 
  arrange(ct, Visit, Sex) %>% 
  rownames()

x <- x[, ord]

groups <- colnames(x) %>%
  `names<-`(colnames(x))

# Limit scale
x[x > 2] <- 2
x[x < -1] <- -1

Please note, the order of the clusters is not always reproducible, and the clusters them selves may also change slightly.

# Create top annotation
tannot <- data.frame(Visit = groups[colnames(x)] %>%
                       strsplit("_|!!|vis") %>%
                       sget(4)) %>% 
  mutate(., donor = rownames(.) %>% strsplit("_vis") %>% sget(1)) %>% 
  mutate(., Sex = sex.df$sex[match(.$donor, sex.df$nn)]) %>% 
  select(-donor) %>% 
  {HeatmapAnnotation(df=.,
                     border=TRUE,
                     col=list(Visit = ggsci::pal_jama()(7)[c(1:3)] %>% `names<-`(c(1:3)),
                              Sex = c("darkred","blue3") %>% setNames(c("Female","Male"))),
                     show_legend=TRUE)}

# Create color palette
pal <- colorRampPalette(c('navy','grey95','firebrick'))(1024)
labeled.gene.subset <- NULL

# Plot
set.seed(1337)

ha <- ComplexHeatmap::Heatmap(x, 
                              name='Expression', 
                              col=pal, 
                              cluster_columns=FALSE, 
                              show_row_names=FALSE, 
                              show_column_names=FALSE, 
                              top_annotation=tannot,
                              left_annotation=NULL,
                              border=TRUE,
                              show_column_dend = FALSE, 
                              show_row_dend = FALSE,
                              row_km = 4,
                              row_km_repeats = 200, # Here, this parameter greatly influences cluster composition
                              column_split = groups[colnames(x)] %>% strsplit("!!") %>% sapply('[[', 2) %>% unname() %>% factor(levels = c("CLSTN2_archetype", "DGAT2_archetype", "PRSS23_archetype")),
                              row_dend_reorder = TRUE)

ht = draw(ha); row_order(ht) -> cls

Calculate GO enrichment

go <- cls %>% 
  lapply(\(y) clusterProfiler::enrichGO(rownames(x)[y],
                                        OrgDb = "org.Hs.eg.db", 
                                        keyType = "SYMBOL", 
                                        ont = "BP", 
                                        universe = rownames(cm.merged))) %>% 
  setNames(names(cls))

Figure 5D

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                             groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
gene <- "DECR1"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

gene <- "LIPA"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 30, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

gene <- "PLIN5"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene, fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F)# Wilcoxon

Figure 6

Figure 6A

Calculate diffusion embeddings. Please note, this takes some time. We saved the embeddings in adipocytes_aspc_embeddings.qs.

## Load Conos object and annotation
data = qread("con_adipocytes_faps.qs", nthreads = 10)
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

## Convert raw counts to Seurat
for (name in names(data$samples)) {
  seu <- CreateSeuratObject(CreateAssayObject(counts = t(data$samples[[name]]$misc$rawCounts)))
  seu$orig.ident <- substr(name, 0, regexpr("vis", name)-2)
  seu$visit <- substr(name, regexpr("vis", name), nchar(name))
  if (name == names(data$samples)[1]) {
    final <- seu
  } else {
    final <- merge(final, seu)
  }
}

## Merge annotation
final <- subset(final, cells = names(anno))
final <- NormalizeData(final)
anno.final <- anno[ match(colnames(final), names(anno))]
final$fine <- anno.final
final$coarse <- substr(anno.final, 0, regexpr("_", anno.final)-1)
final$dataset <- paste(final$orig.ident, final$visit, sep="_")
final <- subset(final, cells = names(which(!is.na(final$fine))))
full <- final

embeddings <- list()
for (nfeats in c(25,50,100,250)) { 
  # Recreate original object
  final <- full
  # ASPC
  ASPC <- subset(final, coarse == "ASPC")
  VariableFeatures(ASPC) <- NULL
  obj.list <- SplitObject(ASPC, split.by = "dataset")
  feats_ASPC <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Adipocytes
  Ad <- subset(final, coarse == "Adipocytes")
  VariableFeatures(Ad) <- NULL
  obj.list <- SplitObject(Ad, split.by = "dataset")
  feats_Ad <- suppressWarnings(SelectIntegrationFeatures(obj.list, nfeatures = nfeats, verbose = FALSE))
  # Combine
  feats <- unique(c(feats_ASPC, feats_Ad))
  ## Embed
  # PCA
  VariableFeatures(final) <- feats
  final <- ScaleData(final)
  final <- RunPCA(final)
  for (useHarmony in c(TRUE, FALSE)) {
    if (useHarmony) {
      final <- suppressWarnings(RunHarmony(final, group.by.vars = "dataset", verbose  = FALSE))
      pcs <- final@reductions$harmony@cell.embeddings
    } else {
      pcs <- final@reductions$pca@cell.embeddings
    }
    for (dims in c(5, 10, 15, 20)) {
      dm <- destiny::DiffusionMap(pcs[,1:dims], verbose = FALSE, n_pcs = NA)
      dm_ev <- dm@eigenvectors
      colnames(dm_ev) <- paste("DC", c(1:20), sep="_")
      embeddings[[(length(embeddings) + 1)]] <- dm_ev
      names(embeddings)[length(embeddings)] <- paste(nfeats, useHarmony, dims, sep="_")
    }
  }
}

Prepare data. We chose to continue with embedding no. 2. The sds_obj object is saved for Figure 6E.

# Load and prepare data
embeddings <- qread("adipocytes_aspc_embeddings.qs")
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

# Calculate slingshot trajectory
anno.sort <- anno[rownames(embeddings[[2]])] %>%
  as.character() %>%
  unname()

sds_obj <- slingshot(embeddings[[2]][, 1:2] %>% 
                       `colnames<-`(c("UMAP1","UMAP2")), 
                     anno.sort, 
                     end.clus = "Adipocytes", 
                     start.clus = "ASPC_DPP4")

qsave(sds_obj, "sds_obj.qs")


sds <- as.SlingshotDataSet(sds_obj)

Final plot

plot.df <- embeddings[[2]][,1:2] %>% 
  as.data.frame() %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  mutate(., annotation = anno[rownames(.)] %>% as.factor()) %>% 
  .[complete.cases(.),]

line.df <- lapply(1:length(sds@curves), \(lineage) {
  sds@curves[[lineage]]$s[,1:2] %>% 
    data.frame() %>% 
    mutate(lineage = lineage %>% as.factor())
}) %>% 
  setNames(sds@curves %>% names()) %>% 
  bind_rows() %>% 
  mutate(lineage = factor(lineage, labels = c("Lineage 1", "Lineage 2")))

ggplot(plot.df, aes(UMAP1, UMAP2, col = annotation)) + 
  geom_point(size = 0.3) +
  geom_path(data = line.df, aes(UMAP1, UMAP2, group = lineage, col = lineage), inherit.aes = F, linewidth = 1) + 
  theme_bw() + 
  theme(legend.position = "right",
        line = element_blank()) + 
  labs(col = "", x = "DC1", y = "DC2") +
  scale_colour_manual(values = c("#E41A1C", RColorBrewer::brewer.pal(5, "Greens")[-1][c(2,1,3,4)], "black", "grey40")) +
  dotSize(3) +
  ylim(c(-0.03, 0.013)) +
  xlim(c(-0.006, 0.0052))
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6B

We need the sds_obj object from Figure 6A.

sds_obj <- qread("sds_obj.qs")

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(pull(., Lineage1), rownames(.))}

plot.df %>% 
  .[names(pseudotime_all), ] %>%
  mutate(pseudotime = pseudotime_all) %>%
  ggplot(aes(UMAP1, UMAP2, col = pseudotime)) +
  geom_point(size = 0.3) +
  geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = F, color = "black", linewidth = 1, alpha = 0.4) +
  theme_bw() +
  theme(line = element_blank()) +
  labs(col = "Pseudotime", x = "DC1", y = "DC2") +
  scale_color_continuous(low = "blue3", high = "orange") +
  ylim(c(-0.005, 0.013)) +
  xlim(c(-0.006, 0.0052))
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6C

We need the sds_obj object from Figure 6A.

sds_obj <- qread("sds_obj.qs")

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(pull(., Lineage1), rownames(.))}

plot.df %>% 
  .[names(pseudotime_all), ] %>%
  mutate(pseudo = pseudotime_all) %>%
  mutate(bin = cut(pseudo, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = TRUE)) %>%
  arrange(bin) %>% 
  mutate(bin = factor(bin, labels = c("Early preadipocytes", "Mid preadipocytes", "Late preadipocytes", "Adipocyte"))) %>% 
  ggplot(aes(UMAP1, UMAP2, col = bin)) +
  geom_point(size = 0.3) +
  geom_path(data = line.df %>% filter(lineage == "Lineage 1"), aes(UMAP1, UMAP2), inherit.aes = FALSE, color = "black", linewidth = 1, alpha = 0.4) +
  theme_bw() +
  theme(line = element_blank()) +
  labs(col = "Binned pseudotime", x = "DC1", y = "DC2") +
  scale_color_manual(values = colorRampPalette(c("pink","darkred"))(4)) +
  ylim(c(-0.005, 0.013)) +
  xlim(c(-0.006, 0.0052)) +
  dotSize(3)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_path()`).

Figure 6D

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>%
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

We need the sds_obj object from Figure 6A.

sds_obj <- qread("sds_obj.qs")

mat.df <- c("ZEB1", "EBF3", "PPARG") %>%
  paste0("(+)") %>%
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>% 
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ] %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         anno = unname(anno)[match(rownames(.),
                                   names(anno) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno))

pseudotime_all <- sds_obj@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  mutate(., 
         cid = rownames(.) %>% 
           strsplit("!!") %>% 
           sapply(\(x) paste0(x[2],"!!",x[3])))

mat.df %<>% .[match(pseudotime_all$cid, rownames(.), nomatch = F), ]

pseudotime_all %<>% .[match(rownames(mat.df), .$cid, nomatch = F), ]

mat.df2 <- mat.df %>% 
  mutate(pseudotime = pseudotime_all$Lineage1) %>%
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T)) %>%
  select(-anno, -pseudotime) %>% 
  reshape2::melt(id.vars = c("visit", "bin", "sample"), variable.name = "regulon") %>%
  group_by(visit, regulon, bin, sample) %>%
  mutate(value = as.numeric(value)) %>% 
  summarize(mm = median(value))
## `summarise()` has grouped output by 'visit', 'regulon', 'bin'. You can override
## using the `.groups` argument.
mat.df2 %>% 
  pull(regulon) %>% 
  unique() %>% 
  as.character() %>% 
  lapply(\(vv) {
    plot.dat <- mat.df2 %>% 
      filter(regulon == vv) %>%
      ungroup() %>%
      mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>% 
      mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
    
    # Kruskal-Wallis
    sex.stat <- plot.dat %$% 
      split(., sex) %>% 
      lapply(\(x) split(x, x$bin)) %>% 
      lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
      lapply(sapply, gtools::stars.pval) %>% 
      {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
      `names<-`(c("Female","Male")) %>% 
      lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
      {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
      bind_rows() %>% 
      mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
      arrange(ct) %>% 
      mutate(sig = gsub(".", "", sig, fixed = T))
    
    # Wilcoxon
    stat.test <- plot.dat %>%
      group_by(bin, sex) %>%
      wilcox_test(mm~sex_visit, paired = F)
    
    stat.test %<>% 
      filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
      add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
      arrange(desc(y.position))
    
    # Plot
    pp <- ggplot(plot.dat, aes(bin, mm)) + 
      geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
      geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
                 color = "black", size = 0.5, alpha = 0.2) +
      theme_bw() +
      labs(x = "", y = "Mean AUC", fill = "", title = vv) +
      theme(line = element_blank(),
            axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
            axis.text = element_text(color = "black")) +
      scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
      geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 2.2, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
      stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
    
    pp
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3)

Figure 6E

Prepare data

We downsampled to 15k nuclei due to calculation times. We provide sds_obj_reduced.qs for slingshot pseudotime estimations for the downsampled dataset.

Here, we calculate for visit 1 and visit 3.

sds_obj <- qread("sds_obj_reduced.qs")

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>% 
  .[!is.na(.)]

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

mat <- con$getJointCountMatrix() %>%
  .[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               visit = con$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(3),
                               donor = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
  {message("Splitting"); split(., variable)}

The calculations take app. 11 h with 14k genes and 10k cells. We’ve added pre-calculated results as pseudotime_res_novis2.qs.

res <- mt %>% 
  sccore::plapply(\(y) {
    x = y %>% 
      filter(!visit == "vis2")
    
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(annova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes

Plot

Load data. We need the sds_obj from Figure 6A. Also, for ease of use we provide slingshot pseudotime W/O visit 2 as pseudotime_novis2.qs.

res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% 
  .[!sapply(., is.null)]

sds_obj <- qread("sds_obj.qs")

pseudotime <- qread("pseudotime_novis2.qs")

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
both <- res %>%
  sapply(\(gene) {
    ann <- gene$annova
    (ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
  })

res.both <- res[both]

p.both <- res.both %>% 
  sapply(\(gene) gene$annova[3, "Pr(>Chisq)"])

tmp <- res.both %>% 
  lget("annova") %>% 
  lapply(dplyr::slice, 2:3) %>% 
  lapply(pull, AIC) %>%
  .[sapply(., \(x) abs(x[2]) > abs(x[1]))] %>% 
  bind_rows() %>% 
  t() %>% 
  as.data.frame() %>% 
  setNames(c("reduced", "full")) %>% 
  mutate(diff = abs(full) - abs(reduced)) %>% 
  mutate(diff.frac = diff / abs(reduced))

res.filter <- res[
  tmp %>% 
    filter(diff.frac >= 0.05) %>% 
    rownames()
]

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)]

visit = con$getDatasetPerCell()[rownames(cm.merged)] %>%
  as.character() %>%
  strsplit("_") %>%
  sget(3)
# Vis3
cm.merged.vis <- cm.merged[visit == "vis3", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth3 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# Vis1
cm.merged.vis <- cm.merged[visit == "vis1", ]
pseudotime.vis <- pseudotime[rownames(cm.merged.vis)]
weights.vis <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.vis), rownames(.)), 1] + 1E-7

scFit <- cm.merged.vis %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.vis, cellWeights = weights.vis, verbose = TRUE)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth1 <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged.vis), tidy = FALSE, n=100)

# Combine smooth
Smooth <- cbind(Smooth3, Smooth1)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Split smooth
Smooth33 <- Smooth[, 1:100]
Smooth11 <- Smooth[, 101:200]

# Vis3
# Seriate the results

Smooth33 <- Smooth33[ seriation::get_order(seriation::seriate(Smooth33, method="PCA_angle")), ]
## Registered S3 methods overwritten by 'registry':
##   method               from 
##   print.registry_field proxy
##   print.registry_entry proxy
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

p3 <- Heatmap(Smooth33, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              row_names_gp = grid::gpar(fontsize = 5)
)

# Vis1
Smooth11 %<>% .[p3@matrix %>% rownames(), ]

p1 <- Heatmap(Smooth11, 
              col = col_fun,
              cluster_columns=F, 
              cluster_rows=F, 
              show_column_names = F, 
              row_names_gp = grid::gpar(fontsize = 5)
)

p3

p1

Figure 7

Figure 7D

For calculation of data, see Liana.ipynb.

dat.raw <- read.delim("/work/02_data/00_other/liana_res.csv",
                      sep = ",",
                      header = T) %>%
  mutate(visit = strsplit(sample, "_") %>%
           sget(3))

Function

lianaCircos <- function(df,
                        top.interactions = 30, 
                        text.size = 1, 
                        pal = RColorBrewer::brewer.pal(9, "Set1"), 
                        cell.types = c("Adipocytes", 
                                       "Myeloid immune cells", 
                                       "ASPCs", 
                                       "EC", 
                                       "SMCs", 
                                       "Lymphoid immune cells", 
                                       "Mast cells", 
                                       "Lymphatic ECs", 
                                       "Pericytes"),
                        big.gap = 5,
                        small.gap = 2,
                        arrow.width = 3,
                        link.ramp.rel = T,
                        link.sort = F,
                        scale = F,
                        arrow.head.width = 0.3,
                        arrow.head.length = 0.3,
                        link.ramp.col = c("navy", "grey", "firebrick")) {
  input_df <- df %>% 
    slice_max(order_by = score, n = top.interactions) %>% 
    mutate(source = paste0(source, " ")) %>% 
    mutate(source_lig = paste0(source, "|", ligand), 
           target_rec = paste0(target, "|", receptor))
  
  if (link.ramp.rel) {
    arr_wd <- rep(arrow.width, nrow(input_df))
  } else {
    arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
  }
  
  # Colors and segments
  anno.col <- setNames(pal, 
                       cell.types) %>% 
    c(., "ASPCs " = unname(.["ASPCs"]))
  
  cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "ASPCs "))]
  
  link_cols <- c()
  
  if (!link.ramp.rel) {
    for (i in input_df$source_lig) {
      link_cols <- c(link_cols, cell_cols[stringr::str_extract(i, 
                                                               "[^|]+")])
    }
  } else {
    input_df %<>% 
      arrange(score)
    
    df.down <- input_df %>% filter(score <= 0)
    link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
    
    df.up <- input_df %>% filter(score > 0)
    link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
    
    link_cols <- c(link_down, link_up)
  }
  
  segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), 
                       paste0(input_df$target, "|", input_df$receptor)))
  
  grp <- stringr::str_extract(segments, "[^|]+") %>% 
    setNames(segments)
  
  # Redo colors
  cell_cols2 <- grp
  for (i in unique(grp)) {
    cell_cols2[cell_cols2 == i] <- cell_cols[i]
  }
  
  # Plot
  input_df %>% 
    select(source_lig, target_rec, score) %>%
    chordDiagram(directional = 1, 
                 group = grp,
                 scale = scale, 
                 diffHeight = 0.005, 
                 direction.type = c("arrows"),
                 link.arr.type = "triangle", 
                 annotationTrack = c(), 
                 preAllocateTracks = list(
                   list(track.height = 0.05),
                   list(track.height = 0.175),
                   list(track.height = 0.05)), 
                 big.gap = big.gap,
                 transparency = 1,
                 link.arr.lwd = arr_wd,
                 link.arr.col = link_cols,
                 link.arr.length = arrow.head.length,
                 link.arr.width = arrow.head.width,
                 small.gap = small.gap
    )
  
  circos.track(track.index = 2, panel.fun = function(x, y) {
    circos.text(CELL_META$xcenter, 
                CELL_META$ylim[1], 
                stringr::str_extract(CELL_META$sector.index, "[^|]+$"), 
                facing = "clockwise", 
                niceFacing = TRUE, 
                adj = c(0, 0.55), 
                cex = 1)
  }, bg.border = NA)
  
  # Split segments
  for (l in segments) {
    highlight.sector(l, track.index = 3, col = cell_cols2[l])
  }
  
  # Add ligand/receptor track
  ## Ligand
  highlight.sector(input_df$source_lig, 
                   track.index = 1, 
                   col = "black", 
                   text = "Ligands", 
                   cex = 1, 
                   text.col = "white", 
                   niceFacing = TRUE)
  ## Receptor
  highlight.sector(input_df$target_rec, 
                   track.index = 1, 
                   col = "white", 
                   text = "Receptors", 
                   cex = 1, 
                   text.col = "black", 
                   border = "black", 
                   niceFacing = TRUE)
  
  # Legends
  minmax <- input_df %>% 
    pull(score) %>% 
    {pmax(abs(min(.)), max(.))} %>% 
    formatC(digits = 1) %>% 
    as.numeric()
  
  col.range = c(-minmax, 0, minmax)
  lgd_links = Legend(at = col.range, 
                     col_fun = colorRamp2(col.range, link.ramp.col), 
                     title_position = "topleft", 
                     title = "Links")
  
  lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)), 
                   title = "Cell type", 
                   type = "points", 
                   legend_gp = gpar(col = "transparent"), 
                   background = cell_cols[unique(c(input_df$source, input_df$target))])
  
  lgd_list_vertical = packLegend(lgd_ct, lgd_links)
  
  draw(lgd_list_vertical, 
       just = c("left", "bottom"), 
       x = unit(5, "mm"), 
       y = unit(5, "mm"))
  
  circos.clear()
}

Plot

dat.plot <- dat.raw %>% 
  dplyr::rename(ligand = ligand_complex,
                receptor = receptor_complex,
                score = lrscore) %>%
  filter(visit %in% c("visit1", "visit3"),
         source == "ASPCs",
         ligand %in% c("COL18A1", "NID1", "JAG1", "FGF2", "VEGFA", "BMP1", "ANGPT1", "MFGE8", "C3", "IGF1", "ANXA1", "ALCAM"),
         receptor %in% c("ITGB5", "ITGAV", "CD46", "NRP1", "BMPR2", "ITGB1", "IGF2R", "INSR", "EGFR")) %>% 
  group_by(visit, ligand, receptor, source, target) %>% 
  summarize(score = mean(score)) %>% 
  ungroup() %>% 
  arrange(visit, source, target, ligand, receptor)
## `summarise()` has grouped output by 'visit', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.
dat.vis1 <- dat.plot %>% 
  filter(visit == "visit1",
         score > 0.8648) %>% 
  mutate(lrst = paste0(ligand, receptor, source, target))

dat.vis3 <- dat.plot %>% 
  mutate(lrst = paste0(ligand, receptor, source, target)) %>% 
  filter(visit == "visit3",
         lrst %in% dat.vis1$lrst) 

dat.rel <- dat.vis3 %>% 
  mutate(score = score - dat.vis1$score)

dat.rel %>% 
  lianaCircos()

Figure 7E

Preparation

# Load Conos object
con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

anno.faps <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.faps))

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno)]

# Use bins instead of anno
spc <- con$getDatasetPerCell()

anno.bin <- qread("sds_obj.qs")@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(.$Lineage1, rownames(.))} %>% 
  {data.frame(pseudotime = unname(.), anno = anno[names(.)], cid = names(.))} %>% 
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T),
         sample = spc[cid]) %>% 
  mutate(sex = meta$sex[match(sample, meta$sample)],
         visit = strsplit(as.character(sample), "_") %>% sget(3)) %>% 
  mutate(anno.final = paste(sample, bin, sex, sep = "!!"))

# Create pseudo CM
anno.final <- anno.bin %>% 
  pull(anno.final) %>% 
  setNames(anno.bin$cid)

cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
genes <- "C3"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(vis = strsplit(id, "_|!!") %>% sget(3),
         bin = strsplit(id, "!!") %>% sget(2),
         sex = strsplit(id, "!!") %>% sget(3)) %>% 
  mutate(ord = order(vis, bin, sex))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>% 
  mutate(bin = strsplit(sample, "!!") %>% 
           sget(2),
         visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = strsplit(sample, "!!") %>% sget(3)) %>% 
  mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$bin)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(bin, sex) %>%
  wilcox_test(value~sex_visit, paired = T)

stat.test %<>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(bin, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "C3") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.5e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F)# Wilcoxon

Figure S1

Load data

con.major <- qread("con_major.qs", nthreads = 10)

cms <- con.major$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(Matrix::t)

spc <- con.major$getDatasetPerCell()

Figure S1A

cms %>% 
  lapply(\(x) diff(x@p)) %>% 
  sapply(median) %>% 
  {data.frame(sample = names(.),
              value = unname(.))} %>% 
  mutate(visit = strsplit(sample, "_") %>% 
           sget(3)) %>% 
  mutate(sample = factor(sample, levels = sample)) %>%
  ggplot(aes(sample, value, fill = visit)) +
  geom_col() +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  labs(x = "", y = "Median genes", fill = "") +
  scale_fill_manual(values = ggsci::pal_jama()(3))

cms %>% 
  lapply(sparseMatrixStats::colSums2) %>% 
  sapply(median) %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>% 
  mutate(visit = strsplit(sample, "_") %>% 
           sget(3)) %>% 
  mutate(sample = factor(sample, levels = sample)) %>%
  ggplot(aes(sample, value, fill = visit)) +
  geom_col() +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  labs(x = "", y = "Median UMIs", fill = "") +
  scale_fill_manual(values = ggsci::pal_jama()(3))

Figure S1B

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Donor") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Figure S1C

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Sex") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S1D

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con.major$plotGraph(groups = varToPlot, 
                    plot.na = F, 
                    size = 0.1,
                    alpha = 0.1, 
                    embedding = "UMAP_refined7",
                    mark.groups = F,
                    show.labels = T,
                    show.legend = T,
                    title = "Visit") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S1E

WATLAS <- read.delim("watlas_labels_l1_adipose_anno_super.tsv")
rownames(WATLAS)=WATLAS$X
WATLAS=WATLAS[2:21]

# Define the order of columns and rows
column_order <- c("Adipocyte", "FAP", "VEC", "LEC", "SMC","Pericyte", "B.cell", "CD4..T.cell", "CD8..T.cell", "ILC", "NK.cell", "Mast",  
                  "Macrophage",  "Monocyte", "DC", "pDC",
                  "Endometrium", "Mesothelial", "Plasmablast", 
                  "Schwann" )

row_order <- c("Adipocytes", "ASPC", "EC", "lEC", "SMCs", "Pericytes", "B-cells", "T-cells", "Mast cells", "Myeloid")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Figure S2

Load data

con <- qread("con_vascular.qs", nthreads = 10)
anno.vascular <- qread("anno_vascular.qs")

spc <- con$getDatasetPerCell()

Figure S2A

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "UMAP2", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Figure S2B

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.1,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S2C

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.3,
              alpha = 0.1, 
              embedding = "UMAP",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "UMAP1", y = "UMAP2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S2D

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define Vascular cells
column_order <- c("SOX5..VEC",  "BTNL9..VEC", "ACKR1..VEC", "LEC", "SMC", "Pericyte")
row_order <- c("arEC", "cEC", "cEC2","vEC", "lEC", "SMCs", "Pericytes")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Figure S3

Figure S3A

Load data

## Plot Individual genes from RNA-seq data
Bulk_norm_counts <- read.delim("Bulk_norm_counts.txt", h=T)
colData <- read.delim("Meta_Data_WL_Select.txt", h=T, dec = ",")
gene_of_interest <- "TNF"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

gene_of_interest <- "CCL3"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

gene_of_interest <- "CCL5"

# Subset norm_counts to get counts for the gene of interest
gene_counts <- Bulk_norm_counts[Bulk_norm_counts$hgnc_symbol == gene_of_interest, ]
rownames(gene_counts)=gene_counts$hgnc_symbol
gene_counts=gene_counts[4:96]

# Reshape gene_counts into long format and merge with colData
gene_counts_long <- gene_counts %>%
  tibble::rownames_to_column(var = "gene") %>%  # Create a 'gene' column from row names
  tidyr::pivot_longer(cols = -gene, names_to = "recordid", values_to = "count")

gene_counts_long <- gene_counts_long %>%
  left_join(colData, by = "recordid")  # Assuming recordid matches between gene_counts and colData

# Define a helper function to map p-values to stars
pval_to_stars <- function(p) {
  if (p < 0.0001) {
    return("****")
  } else if (p < 0.001) {
    return("***")
  } else if (p < 0.01) {
    return("**")
  } else if (p < 0.05) {
    return("*")
  } else {
    return("ns")  # Not significant
  }
}

# Perform a Kruskal-Wallis test separately for each gender
kw_res_F <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "F"))
kw_res_M <- kruskal.test(count ~ visit, data = gene_counts_long %>% filter(sex == "M"))

# Extract Kruskal-Wallis p-values
kw_pval_F <- kw_res_F$p.value
kw_pval_M <- kw_res_M$p.value

# Perform paired Wilcoxon tests for each gender with Holm's correction
paired_wilcox <- function(data, sex_group) {
  data_sex <- data %>% filter(sex == sex_group)
  pairwise_tests <- list(
    visit1_vs_visit2 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit1_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_1") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value,
    visit2_vs_visit3 = wilcox.test(
      x = data_sex %>% filter(visit == "visit_2") %>% pull(count),
      y = data_sex %>% filter(visit == "visit_3") %>% pull(count),
      paired = TRUE
    )$p.value
  )
  corrected_pvals <- p.adjust(unlist(pairwise_tests), method = "holm")
  return(corrected_pvals)
}

wilcox_pvals_F <- paired_wilcox(gene_counts_long, "F")
wilcox_pvals_M <- paired_wilcox(gene_counts_long, "M")

# Map the Wilcoxon p-values to stars
wilcox_stars_F <- sapply(wilcox_pvals_F, pval_to_stars)
wilcox_stars_M <- sapply(wilcox_pvals_M, pval_to_stars)

# Create the boxplot with significance annotations
plot <- ggplot(gene_counts_long, aes(x = interaction(visit, sex), y = count, fill = visit)) +
  geom_boxplot(outlier.shape = NA) +  # Remove outliers
  geom_jitter(color = "grey", size = 1, width = 0.2, alpha = 0.5) +  # Add individual points
  labs(title = "Gene Expression Across Visits and Genders", x = "Visit and Sex", y = "Gene Count") +
  theme_minimal() +
  theme(plot.title = element_text(size = 10)) +
  geom_signif(
    comparisons = list(
      c("visit_1.F", "visit_2.F"), 
      c("visit_1.F", "visit_3.F"), 
      c("visit_2.F", "visit_3.F"),
      c("visit_1.M", "visit_2.M"), 
      c("visit_1.M", "visit_3.M"), 
      c("visit_2.M", "visit_3.M")
    ),
    annotations = c(
      wilcox_stars_F["visit1_vs_visit2"],
      wilcox_stars_F["visit1_vs_visit3"],
      wilcox_stars_F["visit2_vs_visit3"],
      wilcox_stars_M["visit1_vs_visit2"],
      wilcox_stars_M["visit1_vs_visit3"],
      wilcox_stars_M["visit2_vs_visit3"]
    ),
    textsize = 4,
    map_signif_level = FALSE
  )

# Add Kruskal-Wallis p-values as text
plot <- plot + 
  annotate("text", x = 1, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Female p-value:", format(kw_pval_F, digits = 3)), 
           hjust = 0, size = 3) +
  annotate("text", x = 4, y = max(gene_counts_long$count) * 1.1, 
           label = paste("KW Male p-value:", format(kw_pval_M, digits = 3)), 
           hjust = 0, size = 3)

# Display the plot
print(plot)
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded
## Warning in data.frame(x = c(min(comp[1], comp[2]), min(comp[1], comp[2]), : row
## names were found from a short variable and have been discarded

Figure S3B-G

Load data

con <- qread("con_lymphoid.qs", nthreads = 10)
anno.lymphoid <- qread("anno_lymphoid.qs")

spc <- con$getDatasetPerCell()

Figure S3B

con$plotGraph(groups = anno.lymphoid, 
              plot.na = F, 
              size = 0.8,
              alpha = 1, 
              embedding = "largeVis", 
              shuffle.colors = T, 
              show.labels = T, 
              font.size = 5,
              show.legend = T,
              mark.groups = T) + 
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  scale_colour_manual(values = RColorBrewer::brewer.pal(9, "YlOrBr")[-c(1,2)][c(1,4,5,2,6)]) +
  dotSize(3)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S3C

pdf("/work/02_data/00_prints/S3A.pdf", width = 7, height = 4)
varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>%
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)
dev.off()
## png 
##   2

Figure S3D

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var)) %>% 
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S3E

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor() %>% 
  .[names(.) %in% names(anno.lymphoid)]

con$plotGraph(groups = varToPlot, 
              plot.na = F, 
              size = 0.5,
              alpha = 0.8, 
              embedding = "largeVis",
              mark.groups = F,
              show.labels = T,
              show.legend = T,
              title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S3F

cm.merged <- con$getJointCountMatrix() %>% 
  .[rownames(.) %in% names(anno.lymphoid), ]

c("FOXP3", "CTLA4",
  "IL7R", "CD4",
  "CD40LG", "CD8A",
  "GZMK", "GZMH",
  "GNLY", "KLRD1",
  "KLRF1", "NCAM1"
) %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.lymphoid %>% 
                    factor(levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")), 
                  gene.order = ., 
                  cols = c("white","yellow4"))

Figure S3G

con.major <- qread("con_major.qs", nthreads = 10)
anno.major <- qread("anno_major.qs")

# Use Cacoa to calculate proportions
cao <- Cacoa$new(data.object = con.major, 
                 cell.groups = anno.major, 
                 ref.level = "vis2", 
                 target.level = "vis3",
                 sample.groups = con.major$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sapply('[[', 3) %>% 
                   gsub(1, 2, .) %>% 
                   `names<-`(con.major$samples %>% 
                               names()),
                 sample.groups.palette = ggsci::pal_jama()(7))

cao$sample.groups <- con.major$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sapply('[[', 3) %>% 
  `names<-`(con.major$samples %>% 
              names()) %>% 
  gsub("vis", "Visit ", .)

anno.comb <- anno.major[!names(anno.major) %in% names(anno.lymphoid)] %>% 
  {factor(c(., anno.lymphoid))}

p <- cao$plotCellGroupSizes(cell.groups = anno.comb,
                            show.significance = F, 
                       filter.empty.cell.types = F)
plot.dat <- p$data %>% 
  filter(variable %in% levels(anno.lymphoid)) %>% 
  mutate(sex = rep(meta$sex, anno.lymphoid %>% levels() %>% length()),
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, group, sep = " ") %>% gsub("Vis", "vis", .),
         variable = factor(variable, levels = c("Treg", "CD4 T cells", "CD8 T cells", "NK-like T cells", "NK")))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$variable)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$group)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$variable))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(var, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "var", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(variable, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "% cells per sample", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1)) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 5, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 1, hide.ns = F) # Wilcoxon

Figure S3H-J,L

Load data

con <- qread("con_myeloid.qs", nthreads = 10)
anno.immune <- qread("anno_immune.qs")

spc <- con$getDatasetPerCell()

Figure S3H

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Figure S3I

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.1,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S3J

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S3K

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define MACS
column_order <- c("LYVE1..Macrophage",  "CYP27A1..Macrophage", "LPL..Macrophage")
row_order <- c("ATM", "mono/mac", "Early LAM", "LAM")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.5, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Figure S3L

Preparations

Load data for S3H-J first

cts <- c("Early LAM", "LAM", "mono/mac", "ATM")

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.immune[anno.immune %in% cts] %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
  
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names()) %>%
  gsub("Foam-like Mac", "Early LAM", .)

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% c("HLA-B")] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ATM", "mono/mac", "Early LAM", "LAM")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = "HLA-B") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 400, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) # Wilcoxon

Figure S4

Figure S4A-C

Load data

con <- qread("con_aspc.qs", nthreads = 10)
anno.aspc <- qread("anno_aspc.qs")

spc <- con$getDatasetPerCell()

Figure S4A

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Figure S4B

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.1,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S4C

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S4D

FAP_sub <- read.delim("fap.tsv")
rownames(FAP_sub)=FAP_sub$anno
FAP_sub=FAP_sub[2:5]

# Define ASPCs 
column_order <- c("FAP2","FAP1", "FAP4","FAP3")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
FAP_ordered <- FAP_sub[, rev(column_order)] # Reorder columns
FAP_ordered <- FAP_ordered[row_order, ] # Reorder rows

corrplot(t(t(FAP_ordered)), method="color", order = , col.lim=c(-1, 1.09),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Figure S4E

WATLAS <- read.delim("watlas_labels_l2_adipose_anno.tsv")
rownames(WATLAS)=WATLAS$X

# Define ASPCs 
column_order <- c("DPP4..FAP",  "CXCL14..FAP", "PPARG..FAP", "ICAM1..FAP")
row_order <- c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")
WATLAS_ordered <- WATLAS[, rev(column_order)] # Reorder columns
WATLAS_ordered <- WATLAS_ordered[row_order, ] # Reorder rows

corrplot(t(WATLAS_ordered), method="color", order = , col.lim=c(0.4, 1),
         col = rev(COL2("RdBu",200)),
         is.corr = F, 
         tl.col = "black",
         tl.pos = "lower",
         outline = "#C0C0C0",
         addgrid.col= NA)

Figure S4F-G

Prepare data

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.aspc %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
  
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure S4F

gene = "HSP90B1"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 300, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure S4G

gene = "JUN"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 600, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "FOS"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 2e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "FOSB"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = T) %>% 
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 1.6e3, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure S4H

See Scenic.ipynb for mat object calculation.

mat <- fastMatMR::fmm_to_mat("aspc_adi_auc_mat.mtx") %>% 
  `dimnames<-`(list(
    read.table("aspc_adi_auc_mat.rownames", header = T)[, 1],
    read.table("aspc_adi_auc_mat.colnames", header = T)[, 1]
  ))

con.major <- qread("con_major.qs", nthreads = 10)
spc <- con.major$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))
anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.comb <- factor(c(anno.adipocytes, anno.aspc))

plot.dat.tmp <- c("FOS(+)", "FOSB(+)") %>% # ZEB1 is just a random pick to make it easier to manipulate data, omitted later
  {mat[, colnames(mat) %in% .]} %>% 
  as.data.frame() %>%
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc) %>% 
                                     strsplit("!!") %>% 
                                     sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         anno = unname(anno.comb)[match(rownames(.),
                                        names(anno.comb) %>% 
                                          strsplit("!!") %>% 
                                          sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ],
         sample = unname(spc)[match(rownames(.), 
                                    names(spc) %>% 
                                      strsplit("!!") %>% 
                                      sapply(\(x) paste0(x[2],"!!",x[3]))
         )
         ]
  ) %>% 
  filter(!is.na(anno)) %>% 
  reshape2::melt(id.vars = c("visit", "anno", "sample")) %>% 
  mutate(anno_vis = paste0(anno,"_",visit),
         anno_var = paste0(anno,"_",variable)) %>% 
  group_by(visit, anno, variable, sample) %>% 
  summarize(mm = median(value)) %>% 
  ungroup() %>% 
  filter(#variable == "JUN(+)",
         !anno == "Adipocytes") %>% 
  mutate(anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3", "Adipocytes")),
         visit = factor(visit, levels = c("vis1", "vis2", "vis3"), labels = c("Visit 1", "Visit 2", "Visit 3")),
         sex = meta$sex[match(.$sample, meta$sample)],
         variable = factor(variable)) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .),
         anno = factor(anno, levels = c("ASPC_DPP4", "ASPC_CXCL14", "ASPC_PPARG", "ASPC_EPHA3")))
## `summarise()` has grouped output by 'visit', 'anno', 'variable'. You can
## override using the `.groups` argument.
plot.dat <- plot.dat.tmp %>% 
  filter(variable == "FOS(+)")

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean AUC", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

plot.dat <- plot.dat.tmp %>% 
  filter(variable == "FOSB(+)")

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(ct = factor(ct, levels = unique(plot.dat$anno))) %>% 
  arrange(ct) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(mm~sex_visit, paired = TRUE) %>%
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
  arrange(desc(y.position))

# Plot
ggplot(plot.dat, aes(anno, mm)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
             color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Mean AUC", fill = "") +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 6, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 2, hide.ns = F) # Wilcoxon

Figure S5

Load data

con <- qread("con_adipocytes.qs", nthreads = 10)

spc <- con$getDatasetPerCell()

Figure S5A

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_vis") %>% 
  sget(1) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Donor") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3)

Figure S5B

var <- meta$sex

varToPlot <- grepl.replace(spc %>% 
                             as.character(), 
                           meta$sample, 
                           var) %>% 
  setNames(names(spc)) %>% 
  factor(labels = unique(var))

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Sex") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = c("firebrick", "navy"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S5C

varToPlot <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc)) %>% 
  factor()

con$plotGraph(groups = varToPlot, 
                   plot.na = F, 
                   size = 0.3,
                   alpha = 0.1, 
                   embedding = "largeVis",
                   mark.groups = F,
                   show.labels = T,
                   show.legend = T,
                   title = "Visit") +
  labs(x = "largeVis1", y = "largeVis2", col = "") + 
  theme(line = element_blank()) +
  dotSize(3) +
  scale_color_manual(values = ggsci::pal_jama()(3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S5D

Please note, the clustering algorithm holds some randomness, clusters will not always be the same.

con$findCommunities(name = "leiden1", resolution = 0.5)
con$findCommunities(name = "leiden2", resolution = 0.5)
con$findCommunities(name = "leiden3", resolution = 0.5)
con$findCommunities(name = "leiden4", resolution = 0.5)

seq(4) %>% 
  lapply(\(x) {
    con$plotGraph(embedding = "largeVis", clustering = paste0("leiden",x), font.size = 6) +
  theme(line = element_blank()) +
  scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 2)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Figure S5E

Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.

con$findCommunities(resolution = 0.5, test.stability = T, name = "l05stab")
## running 100 subsampling iterations ...
## done
## calculating flat stability stats ...
## adjusted Rand ...
## done
## calculating hierarchical stability stats ...
## upper clustering ...
## clusterTree Jaccard ...
## done
plot_grid(plotlist = list(
  con$plotClusterStability("l05stab", "ari") +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_blank()) + 
    geom_boxplot(aes(col = "#E41A1C"), notch = T) +
    labs(x = " \n "),
  con$plotClusterStability("l05stab", "hjc") + 
    theme_bw() +
    theme(line = element_blank()) + 
    scale_color_manual(values = RColorBrewer::brewer.pal(4, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the conos package.
##   Please report the issue at <https://github.com/kharchenkolab/conos/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure S5F

Please note, the clustering algorithm holds some randomness, stabilities will not always be the same.

con$findCommunities(resolution = 1, test.stability = T, name = "l05stab")
## running 100 subsampling iterations ...
## done
## calculating flat stability stats ...
## adjusted Rand ...
## done
## calculating hierarchical stability stats ...
## upper clustering ...
## clusterTree Jaccard ...
## done
plot_grid(plotlist = list(
  con$plotClusterStability("l05stab", "ari") +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_blank()) + 
    geom_boxplot(aes(col = "#E41A1C"), notch = T) +
    labs(x = " \n "),
  con$plotClusterStability("l05stab", "hjc") + 
    theme_bw() +
    theme(line = element_blank()) + 
    scale_color_manual(values = RColorBrewer::brewer.pal(8, "Reds")[-1])
), ncol = 2, rel_widths = c(1.3,3))

Figure S5G

cm.merged <- con$getJointCountMatrix()

anno.adipocytes <- qread("anno_adipocytes_archetypes.qs")

c("PRSS23","PDE5A","ADIPOQ","DGAT2", "GPAM", "LPL", "DCN", "VIM", "RORA", "CLSTN2", "CRIM1", "ITGA1") %>% 
  sccore::dotPlot(., 
                  cm.merged, 
                  anno.adipocytes, 
                  gene.order = ., 
                  cols = c("white","red3"))

Figure S5H-I

Preparation

cm.merged <- con$getJointCountMatrix(raw = T)

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[rownames(cm.merged)]
anno.subtype <- anno.adipocytes %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
  
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

cm.pseudo.tmp <- sccore::collapseCellsByType(cm.merged,
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo <- cm.pseudo.tmp %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_|!!") %>% 
                                   sget(3) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo.tmp), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T) %>% 
  t()
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure S5H

gene = "MAP3K5"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

gene = "AKT3"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 100, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure S5I

gene = "ADAMTS10"

plot.dat <- cm.pseudo %>% 
  .[, colnames(.) %in% gene] %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "sample") %>% 
  mutate(visit = strsplit(sample, "!!|_") %>% 
           sget(3) %>% 
           factor(levels = c("vis1", "vis2", "vis3"), labels = c("visit 1", "visit 2", "visit 3")),
         anno = strsplit(sample, "!!") %>% 
           sget(2) %>% 
           factor(levels = c("DGAT2_archetype", "CLSTN2_archetype", "PRSS23_archetype")),
         donor = strsplit(sample, "!!") %>% 
           sget(1)) %>% 
  reshape2::melt(id.vars = c("sample", "visit", "anno", "donor")) %>%
  mutate(sex = meta$sex[match(donor, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, visit, sep = " "))

# Kruskal-Wallis
sex.stat <- plot.dat %$% 
  split(., sex) %>% 
  lapply(\(x) split(x, x$anno)) %>% 
  lapply(lapply, \(x) kruskal.test(x$value, g = x$visit)$p.value) %>% 
  lapply(sapply, gtools::stars.pval) %>% 
  {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
  `names<-`(c("Female","Male")) %>% 
  lapply(\(x) data.frame(anno = names(x), sig = unname(x))) %>% 
  {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
  bind_rows() %>% 
  mutate(anno = factor(anno, levels = unique(plot.dat$anno))) %>% 
  arrange(anno) %>% 
  mutate(sig = gsub(".", "", sig, fixed = T))

# Wilcoxon
stat.test <- plot.dat %>%
  dplyr::rename(var = variable) %>% # add_xy... already uses "variable", need to rename
  group_by(anno, sex) %>%
  wilcox_test(value~sex_visit, paired = F) %>%
  mutate(., p.adj.signif = apply(., 1, \(x) if(x[10] > 0.05 && x[10] <= 0.1) "." else x[11])) %>% 
  filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
  add_xy_position(x = "anno", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05)

plot.dat %>% 
  ggplot(aes(anno, value)) + 
  geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
  geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1),
        color = "black", size = 0.5, alpha = 0.2) +
  theme_bw() +
  labs(x = "", y = "Normalized pseudobulk expression", title = gene) +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
        axis.text = element_text(color = "black")) +
  scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
  geom_text(inherit.aes = F, data = sex.stat, aes(anno, y = 200, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
  stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 3, hide.ns = F) + # Wilcoxon
  guides(fill = "none")

Figure S6

Figure S6A-C

Preparations

We downsampled to 15k nuclei due to calculation times. We provide sds_obj_reduced.qs for slingshot pseudotime estimations for the downsampled dataset.

Here, we calculate for visit 1 and visit 3.

sds_obj <- qread("sds_obj_reduced.qs")

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1] %>% 
  .[!is.na(.)]

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)

mat <- con$getJointCountMatrix() %>%
  .[match(names(pseudotime), rownames(.)), colSums(.) > 2e2]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               visit = con$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(3),
                               donor = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "visit", "donor"))} %$%
  {message("Splitting"); split(., variable)}

The calculations take app. 11 h with 14k genes and 10k cells. We’ve added pre-calculated results as pseudotime_res_novis2.qs.

res <- mt %>% 
  sccore::plapply(\(y) {
    x = y %>% 
      filter(!visit == "vis2")
    
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(visit)), random = ~(1 | donor), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | donor), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | donor), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(annova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 5, mc.preschedule = T, mc.cleanup = T, progress = F) # 10 cores fails, 5 is max tested that finishes

Figure S6A,B

Load data. For ease, we also provide pseudotime W/O visit 2 nuclei. We need the sds_obj from Figure 6A. Also, for ease of use we provide slingshot pseudotime W/O visit 2 as pseudotime_novis2.qs.

res <- qread("pseudotime_res_novis2.qs", nthreads = 10) %>% 
  .[!sapply(., is.null)]

sds_obj <- qread("sds_obj.qs")

pseudotime <- qread("pseudotime_novis2.qs")

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
both <- res %>%
  sapply(\(gene) {
    ann <- gene$annova
    (ann[2, "Pr(>Chisq)"] <= 0.05 & ann[3, "Pr(>Chisq)"] <= 0.05)
  })

res.both <- res[both]

rsq.both <- res.both %>% 
  sapply(\(gene) gene$r.sq[1]) %>% # Ranking by r2 for pseudotime
  sort(decreasing = T)

res.filter <- res.both %>% 
  .[rsq.both %>% .[. > 0.2] %>% names() %>% gsub(".full", "", .)]

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)

Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]

# Create heatmap
gene.subset <- which(rownames(Smooth) %in% c("PDGFRB", "DCN", "FBN1", "LPL", "ADIPOQ"))
labels <- rownames(Smooth)[gene.subset]
lannot <- rowAnnotation(link = anno_mark(at = gene.subset,
                                         labels = labels,
                                         labels_gp = grid::gpar(fontsize = 7)))

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
             col = col_fun,
             cluster_columns=F, 
             cluster_rows=F, 
             show_column_names = F, 
             show_row_names = F,
             left_annotation = lannot)

repressed.start <- 1
tempinduced.start = which(rownames(Smooth) == "SMOC2")
tempinduced.end <- which(rownames(Smooth) == "APOD")
repressed.end <- tempinduced.start - 1
induced.start <- tempinduced.end + 1
induced.end <- nrow(Smooth)

go.list <- Smooth %>% 
  rownames() %>% 
  {
    list(
      repressed = .[repressed.start : repressed.end],
      tempinduced = .[tempinduced.start : tempinduced.end],
      induced = .[induced.start : induced.end]
    )
  } %>% 
  lapply(\(x) clusterProfiler::enrichGO(x, 
                                OrgDb = org.Hs.eg.db::org.Hs.eg.db, 
                                keyType = "SYMBOL", 
                                ont = "BP", 
                                universe = rownames(con$samples[[1]])))

go.list %>% 
  lapply(getElement, "result") %>% 
  lapply(head, 10)
## $repressed
##                    ID
## GO:0030198 GO:0030198
## GO:0043062 GO:0043062
## GO:0045229 GO:0045229
## GO:0031589 GO:0031589
## GO:0071560 GO:0071560
## GO:0071559 GO:0071559
## GO:0061448 GO:0061448
## GO:0085029 GO:0085029
## GO:0071230 GO:0071230
## GO:0043200 GO:0043200
##                                                              Description
## GO:0030198                             extracellular matrix organization
## GO:0043062                          extracellular structure organization
## GO:0045229                 external encapsulating structure organization
## GO:0031589                                       cell-substrate adhesion
## GO:0071560 cellular response to transforming growth factor beta stimulus
## GO:0071559                   response to transforming growth factor beta
## GO:0061448                                 connective tissue development
## GO:0085029                                 extracellular matrix assembly
## GO:0071230                      cellular response to amino acid stimulus
## GO:0043200                                        response to amino acid
##            GeneRatio   BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0030198    22/128 332/18888 0.06626506       9.778238 13.329206 6.782925e-16
## GO:0043062    22/128 333/18888 0.06606607       9.748874 13.304969 7.223228e-16
## GO:0045229    22/128 334/18888 0.06586826       9.719686 13.280834 7.690482e-16
## GO:0031589    19/128 359/18888 0.05292479       7.809714 10.760196 4.137077e-12
## GO:0071560    14/128 302/18888 0.04635762       6.840646  8.451648 1.793284e-08
## GO:0071559    14/128 309/18888 0.04530744       6.685680  8.323778 2.391837e-08
## GO:0061448    13/128 288/18888 0.04513889       6.660807  7.996284 8.139110e-08
## GO:0085029     7/128  55/18888 0.12727273      18.780682 10.907873 8.617905e-08
## GO:0071230     8/128  86/18888 0.09302326      13.726744  9.770900 1.216058e-07
## GO:0043200     9/128 124/18888 0.07258065      10.710181  8.960774 1.671552e-07
##                p.adjust       qvalue
## GO:0030198 5.847330e-13 4.800480e-13
## GO:0043062 5.847330e-13 4.800480e-13
## GO:0045229 5.847330e-13 4.800480e-13
## GO:0031589 2.359168e-09 1.936805e-09
## GO:0071560 8.180962e-06 6.716321e-06
## GO:0071559 9.092967e-06 7.465049e-06
## GO:0061448 2.457180e-05 2.017270e-05
## GO:0085029 2.457180e-05 2.017270e-05
## GO:0071230 3.082030e-05 2.530253e-05
## GO:0043200 3.812810e-05 3.130201e-05
##                                                                                                                                              geneID
## GO:0030198 ADAMTS5/CST3/DPP4/MFAP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0043062 ADAMTS5/CST3/DPP4/MFAP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0045229 ADAMTS5/CST3/DPP4/MFAP4/MMP2/COL1A1/ADAMTSL1/COL3A1/SH3PXD2B/TNXB/COL1A2/RECK/ANTXR1/COL14A1/APP/CCDC80/HMCN1/LUM/ELN/FBLN1/PDGFRA/FBLN5
## GO:0031589                       ACTG1/S100A10/CCN2/AXL/ITGA11/CD63/COL1A1/CD34/ITGBL1/COL3A1/PXN/TNXB/FBLN2/CARMIL1/ANTXR1/CD44/CCDC80/FBLN1/FBLN5
## GO:0071560                                                        CLEC3B/SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
## GO:0071559                                                        CLEC3B/SMURF2/FBN1/HTRA3/LTBP4/CILP/COL1A1/COL3A1/PXN/COL1A2/ZEB1/PDGFD/IGF1R/FYN
## GO:0061448                                                              TIMP1/CRIP1/EBF2/CCN2/ECM1/COL1A1/CD34/COL3A1/SH3PXD2B/ZEB1/CD44/PDGFD/GLI3
## GO:0085029                                                                                                MFAP4/COL3A1/TNXB/COL1A2/ANTXR1/ELN/FBLN5
## GO:0071230                                                                                          MMP2/COL1A1/COL3A1/COL1A2/ZEB1/PDGFD/PDGFRA/FYN
## GO:0043200                                                                                    MMP2/COL1A1/COL3A1/COL1A2/ZEB1/PDGFD/IGF1R/PDGFRA/FYN
##            Count
## GO:0030198    22
## GO:0043062    22
## GO:0045229    22
## GO:0031589    19
## GO:0071560    14
## GO:0071559    14
## GO:0061448    13
## GO:0085029     7
## GO:0071230     8
## GO:0043200     9
## 
## $tempinduced
##                    ID
## GO:0006956 GO:0006956
## GO:0051895 GO:0051895
## GO:0150118 GO:0150118
## GO:0006957 GO:0006957
## GO:0006959 GO:0006959
## GO:1901889 GO:1901889
## GO:0001953 GO:0001953
## GO:0032970 GO:0032970
## GO:0006958 GO:0006958
## GO:0010518 GO:0010518
##                                                            Description
## GO:0006956                                       complement activation
## GO:0051895              negative regulation of focal adhesion assembly
## GO:0150118 negative regulation of cell-substrate junction organization
## GO:0006957                  complement activation, alternative pathway
## GO:0006959                                     humoral immune response
## GO:1901889               negative regulation of cell junction assembly
## GO:0001953                 negative regulation of cell-matrix adhesion
## GO:0032970                  regulation of actin filament-based process
## GO:0006958                    complement activation, classical pathway
## GO:0010518               positive regulation of phospholipase activity
##            GeneRatio   BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0006956      5/34  67/18888 0.07462687      41.457419 14.087484 1.239669e-07
## GO:0051895      3/34  16/18888 0.18750000     104.161765 17.530285 2.936896e-06
## GO:0150118      3/34  16/18888 0.18750000     104.161765 17.530285 2.936896e-06
## GO:0006957      3/34  18/18888 0.16666667      92.588235 16.508560 4.268952e-06
## GO:0006959      6/34 258/18888 0.02325581      12.919289  8.186023 5.979691e-06
## GO:1901889      3/34  32/18888 0.09375000      52.080882 12.280832 2.550522e-05
## GO:0001953      3/34  40/18888 0.07500000      41.664706 10.932870 5.030718e-05
## GO:0032970      6/34 385/18888 0.01558442       8.657601  6.446459 5.722657e-05
## GO:0006958      3/34  42/18888 0.07142857      39.680672 10.656836 5.831047e-05
## GO:0010518      3/34  43/18888 0.06976744      38.757866 10.525986 6.260671e-05
##                p.adjust       qvalue                              geneID Count
## GO:0006956 0.0001363636 0.0000947368                  C1R/CFD/C1S/C3/CFH     5
## GO:0051895 0.0010768618 0.0007481355                   ARHGAP6/DLC1/APOD     3
## GO:0150118 0.0010768618 0.0007481355                   ARHGAP6/DLC1/APOD     3
## GO:0006957 0.0011739617 0.0008155944                          CFD/C3/CFH     3
## GO:0006959 0.0013155320 0.0009139485           C1R/CFD/C1S/C3/CFH/CXCL14     6
## GO:1901889 0.0046759579 0.0032485602                   ARHGAP6/DLC1/APOD     3
## GO:0001953 0.0068867380 0.0047844706                   ARHGAP6/DLC1/APOD     3
## GO:0032970 0.0068867380 0.0047844706 GSN/PDGFRB/ANK2/ARHGAP6/DLC1/CXCL12     6
## GO:0006958 0.0068867380 0.0047844706                          C1R/C1S/C3     3
## GO:0010518 0.0068867380 0.0047844706                PDGFRB/AGTR1/ARHGAP6     3
## 
## $induced
##                    ID                                Description GeneRatio
## GO:0019915 GO:0019915                              lipid storage      8/56
## GO:0010889 GO:0010889 regulation of sequestering of triglyceride      5/56
## GO:0030730 GO:0030730               sequestering of triglyceride      5/56
## GO:0010883 GO:0010883                regulation of lipid storage      6/56
## GO:1905954 GO:1905954  positive regulation of lipid localization      7/56
## GO:1905952 GO:1905952           regulation of lipid localization      8/56
## GO:0051235 GO:0051235                    maintenance of location      9/56
## GO:0016042 GO:0016042                    lipid catabolic process      9/56
## GO:0034308 GO:0034308          primary alcohol metabolic process      6/56
## GO:0006641 GO:0006641             triglyceride metabolic process      6/56
##              BgRatio RichFactor FoldEnrichment    zScore       pvalue
## GO:0019915  94/18888 0.08510638      28.705167 14.683932 3.244150e-10
## GO:0010889  15/18888 0.33333333     112.428571 23.542306 5.601370e-10
## GO:0030730  18/18888 0.27777778      93.690476 21.454218 1.587389e-09
## GO:0010883  54/18888 0.11111111      37.476190 14.637346 1.193253e-08
## GO:1905954 112/18888 0.06250000      21.080357 11.622677 3.892818e-08
## GO:1905952 186/18888 0.04301075      14.506912 10.094756 7.215063e-08
## GO:0051235 337/18888 0.02670623       9.007630  8.088399 5.965806e-07
## GO:0016042 340/18888 0.02647059       8.928151  8.044335 6.423639e-07
## GO:0034308 106/18888 0.05660377      19.091644 10.185617 7.003288e-07
## GO:0006641 108/18888 0.05555556      18.738095 10.080878 7.820144e-07
##                p.adjust       qvalue
## GO:0019915 3.587677e-07 2.691606e-07
## GO:0010889 3.587677e-07 2.691606e-07
## GO:0030730 6.778150e-07 5.085214e-07
## GO:0010883 3.821391e-06 2.866946e-06
## GO:1905954 9.973401e-06 7.482407e-06
## GO:1905952 1.540416e-05 1.155676e-05
## GO:0051235 9.968014e-05 7.478365e-05
## GO:0016042 9.968014e-05 7.478365e-05
## GO:0034308 9.968014e-05 7.478365e-05
## GO:0006641 1.001760e-04 7.515570e-05
##                                                           geneID Count
## GO:0019915       PPARG/ACACB/NRIP1/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C     8
## GO:0010889                          PPARG/CIDEA/PNPLA2/PLIN5/LPL     5
## GO:0030730                          PPARG/CIDEA/PNPLA2/PLIN5/LPL     5
## GO:0010883                    PPARG/ACACB/CIDEA/PNPLA2/PLIN5/LPL     6
## GO:1905954              PPARG/ACACB/CIDEA/ACSL1/PLIN5/ADIPOQ/LPL     7
## GO:1905952       PPARG/ACACB/CIDEA/ACSL1/PNPLA2/PLIN5/ADIPOQ/LPL     8
## GO:0051235   PPARG/ACACB/NRIP1/DMD/CIDEA/PNPLA2/PLIN5/LPL/ACVR1C     9
## GO:0016042 ACACB/CIDEA/PNPLA2/PLIN5/ADIPOQ/LIPE/LPL/PDE3B/PLAAT3     9
## GO:0034308                   ADH1B/ALDH2/PNPLA2/PECR/RETSAT/LIPE     6
## GO:0006641                    ACSL1/PNPLA2/PLIN5/LIPE/LPL/PLAAT3     6

Figure S6C

Figure S6A needs to be calculated first. We obtain list with human transcription factors from public resource.

tfs <- read.table("https://humantfs.ccbr.utoronto.ca/download/v_1.01/TF_names_v_1.01.txt") %>% 
  pull(V1)

tf.filter <- intersect(names(res.filter), tfs)

cm.merged <- con$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% tf.filter] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Omit MALAT1
Smooth %<>% .[!rownames(.) == "MALAT1", ]

col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
             col = col_fun,
             cluster_columns=F, 
             cluster_rows=F, 
             show_column_names = F, 
             row_names_gp = grid::gpar(fontsize = 10)
             )

Figure S6D

We need the sds_obj object from Figure 6A.

con <- qread("con_adipocytes_aspc.qs", nthreads = 10)
spc <- con$getDatasetPerCell()
vpc <- spc %>% 
  as.character() %>% 
  strsplit("_|!!") %>% 
  sget(3) %>% 
  setNames(names(spc))

anno.adipocytes <- qread("anno_major.qs") %>% 
  .[. == "Adipocytes"] %>% 
  factor()
anno.aspc <- qread("anno_aspc.qs")
anno <- factor(c(anno.adipocytes, anno.aspc))

cm.merged <- con$getJointCountMatrix()
mat.df <- c("ZEB1", "EBF2", "PPARG") %>%
  {cm.merged[, colnames(cm.merged) %in% .]} %>% 
  as.matrix() %>% 
  as.data.frame() %>% 
  mutate(., 
         visit = unname(vpc)[match(rownames(.), 
                                   names(vpc)
                                   )
                             ] %>% 
           factor(labels = c("Visit 1", "Visit 2", "Visit 3")),
         anno = unname(anno)[match(rownames(.),
                                   names(anno)
                                   )
                             ],
         sample = unname(spc)[match(rownames(.), 
                                   names(spc)
                                   )
                             ]
         ) %>% 
  filter(!is.na(anno))

pseudotime_all <- qread("sds_obj.qs", nthreads = 10)@assays@data@listData$pseudotime %>% 
  as.data.frame() %>% 
  filter(!is.na(Lineage1)) %>% 
  {setNames(.$Lineage1, rownames(.))}

mat.df %<>% .[match(names(pseudotime_all), rownames(.), nomatch = F), ]

pseudotime_all %<>% .[rownames(mat.df)]

mat.df2 <- mat.df %>% 
  mutate(pseudotime = pseudotime_all) %>% 
  mutate(bin = cut(pseudotime, breaks = c(0, 0.005, 0.01, 0.0165, 0.023))) %>% 
  na.omit() %>% 
  mutate(bin = as.character(bin) %>% 
           strsplit(",") %>% 
           sget(2) %>% 
           gsub("]", "", ., fixed = T)) %>%
  select(-anno, -pseudotime) %>%
  reshape2::melt(id.vars = c("visit", "bin", "sample")) %>% 
  filter(value > 0) %>% 
  group_by(visit, variable, bin, sample) %>% 
  mutate(value = as.numeric(value)) %>% 
  summarize(mm = mean(value)) %>% 
  mutate(sex = meta$sex[match(sample, meta$sample)]) %>% 
  mutate(sex_visit = paste(sex, gsub("Visit", "visit", visit), sep = " "))
## `summarise()` has grouped output by 'visit', 'variable', 'bin'. You can
## override using the `.groups` argument.
mat.df2 %>% 
  pull(variable) %>% 
  unique() %>% 
  as.character() %>% 
  lapply(\(vv) {
    plot.dat <- mat.df2 %>% 
      filter(variable == vv) %>%
    ungroup() %>%
      mutate(sex = meta$sex[match(.$sample, meta$sample)]) %>% 
      mutate(sex_visit = paste(sex, visit, sep = " ") %>% gsub("Visit", "visit", .))
    
    # Kruskal-Wallis
    sex.stat <- plot.dat %$% 
      split(., sex) %>% 
      lapply(\(x) split(x, x$bin)) %>% 
      lapply(lapply, \(x) kruskal.test(x$mm, g = x$visit)$p.value) %>% 
      lapply(sapply, gtools::stars.pval) %>% 
      {lapply(seq(length(.)), \(x) gsub("*", c("$","#")[x], .[[x]], fixed = T))} %>% 
      `names<-`(c("Female","Male")) %>% 
      lapply(\(x) data.frame(ct = names(x), sig = unname(x))) %>% 
      {lapply(names(.), \(nn) mutate(.[[nn]], sex = nn))} %>% 
      bind_rows() %>% 
      mutate(ct = factor(ct, levels = unique(plot.dat$bin))) %>% 
      arrange(ct) %>% 
      mutate(sig = gsub(".", "", sig, fixed = T))
    
    # Wilcoxon
    stat.test <- plot.dat %>%
      group_by(bin, sex) %>%
      wilcox_test(mm~sex_visit, paired = F) %>% 
      filter(p.adj.signif != "ns", !is.na(p)) %>% # Remove those not significant for KW
      add_xy_position(x = "bin", dodge = 0.8, scales = "free_y", fun = "mean_sd", step.increase = 0.05) %>% # Adjust line position
      arrange(desc(y.position))
    
    # Plot
    ggplot(plot.dat, aes(bin, mm)) + 
      geom_boxplot(aes(fill = sex_visit), position = position_dodge(), outlier.shape = NA) +
      geom_point(aes(fill = sex_visit), position = position_jitterdodge(jitter.width = 0.1), 
                 color = "black", size = 0.5, alpha = 0.2) +
      theme_bw() +
      labs(x = "", y = "Mean AUC", fill = "", title = vv) +
      theme(line = element_blank(),
            axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
            axis.text = element_text(color = "black")) +
      scale_fill_manual(values = c(colorRampPalette(c("pink","darkred"))(3), colorRampPalette(c("lightblue","blue3"))(3))) +
      geom_text(inherit.aes = F, data = sex.stat, aes(ct, y = 1.4, label = sig, group = sex), position = position_dodge(width = 0.9)) + # KW
      stat_pvalue_manual(stat.test, label = "p.adj.signif", tip.length = 0.01, bracket.nudge.y = 0.2, hide.ns = F) # Wilcoxon
  }) %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3)

Session info

Time to knit

Sys.time() - tt
## Time difference of 26.77843 mins
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.0/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] corrplot_0.94               sccore_1.0.5               
##  [3] cowplot_1.1.3               circlize_0.4.16            
##  [5] slingshot_2.12.0            TrajectoryUtils_1.12.0     
##  [7] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
##  [9] Biobase_2.64.0              GenomicRanges_1.56.1       
## [11] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [13] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## [15] MatrixGenerics_1.16.0       matrixStats_1.4.1          
## [17] princurve_2.1.6             harmony_1.2.1              
## [19] Rcpp_1.0.13                 Seurat_5.1.0               
## [21] SeuratObject_5.0.2          sp_2.1-4                   
## [23] destiny_3.18.0              ComplexHeatmap_2.20.0      
## [25] ggpubr_0.6.0                rstatix_0.7.2              
## [27] ggplot2_3.5.1               cacoa_0.4.0                
## [29] scHelper_0.0.4              pagoda2_1.0.12             
## [31] magrittr_2.0.3              dplyr_1.1.4                
## [33] qs_0.27.2                   conos_1.5.2                
## [35] igraph_2.0.3                Matrix_1.7-0               
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2         nnet_7.3-19              
##   [3] goftest_1.2-3             Biostrings_2.72.1        
##   [5] vctrs_0.6.5               spatstat.random_3.3-2    
##   [7] RApiSerialize_0.1.4       digest_0.6.37            
##   [9] png_0.1-8                 shape_1.4.6.1            
##  [11] proxy_0.4-27              registry_0.5-1           
##  [13] ggrepel_0.9.6             deldir_2.0-4             
##  [15] parallelly_1.38.0         MASS_7.3-61              
##  [17] reshape2_1.4.4            httpuv_1.6.15            
##  [19] foreach_1.5.2             qvalue_2.36.0            
##  [21] withr_3.0.1               ggfun_0.1.6              
##  [23] xfun_0.47                 survival_3.7-0           
##  [25] memoise_2.0.1             hexbin_1.28.4            
##  [27] gson_0.1.0                clusterProfiler_4.12.6   
##  [29] ggsci_3.2.0               tidytree_0.4.6           
##  [31] zoo_1.8-12                GlobalOptions_0.1.2      
##  [33] gtools_3.9.5              pbapply_1.7-2            
##  [35] R.oo_1.26.0               DEoptimR_1.1-3           
##  [37] Formula_1.2-5             KEGGREST_1.44.1          
##  [39] promises_1.3.0            scatterplot3d_0.3-44     
##  [41] httr_1.4.7                globals_0.16.3           
##  [43] fitdistrplus_1.2-1        stringfish_0.16.0        
##  [45] rstudioapi_0.16.0         UCSC.utils_1.0.0         
##  [47] miniUI_0.1.1.1            generics_0.1.3           
##  [49] DOSE_3.30.5               curl_5.2.3               
##  [51] zlibbioc_1.50.0           ggraph_2.2.1             
##  [53] ca_0.71.1                 polyclip_1.10-7          
##  [55] GenomeInfoDbData_1.2.12   SparseArray_1.4.8        
##  [57] RcppEigen_0.3.4.0.2       xtable_1.8-4             
##  [59] stringr_1.5.1             doParallel_1.0.17        
##  [61] fastMatMR_1.2.5           evaluate_1.0.0           
##  [63] S4Arrays_1.4.1            irlba_2.3.5.1            
##  [65] colorspace_2.1-1          ROCR_1.0-11              
##  [67] reticulate_1.39.0         spatstat.data_3.1-2      
##  [69] lmtest_0.9-40             ggtree_3.12.0            
##  [71] viridis_0.6.5             later_1.3.2              
##  [73] lattice_0.22-6            spatstat.geom_3.3-3      
##  [75] future.apply_1.11.2       robustbase_0.99-4-1      
##  [77] shadowtext_0.1.4          scattermore_1.2          
##  [79] triebeard_0.4.1           RcppAnnoy_0.0.22         
##  [81] xts_0.14.0                class_7.3-22             
##  [83] pillar_1.9.0              nlme_3.1-166             
##  [85] iterators_1.0.14          compiler_4.4.2           
##  [87] RSpectra_0.16-2           stringi_1.8.4            
##  [89] TSP_1.2-4                 tensor_1.5               
##  [91] plyr_1.8.9                drat_0.2.4               
##  [93] crayon_1.5.3              abind_1.4-8              
##  [95] gridGraphics_0.5-1        locfit_1.5-9.10          
##  [97] org.Hs.eg.db_3.19.1       graphlayouts_1.2.0       
##  [99] bit_4.5.0                 pcaMethods_1.96.0        
## [101] fastmatch_1.1-4           tradeSeq_1.18.0          
## [103] codetools_0.2-20          TTR_0.24.4               
## [105] bslib_0.8.0               e1071_1.7-16             
## [107] GetoptLong_1.0.5          ggplot.multistats_1.0.1  
## [109] plotly_4.10.4             mime_0.12                
## [111] splines_4.4.2             fastDummies_1.7.4        
## [113] sparseMatrixStats_1.16.0  brew_1.0-10              
## [115] N2R_1.0.3                 knitr_1.48               
## [117] blob_1.2.4                utf8_1.2.4               
## [119] clue_0.3-65               fs_1.6.4                 
## [121] listenv_0.9.1             DelayedMatrixStats_1.26.0
## [123] ggplotify_0.1.2           ggsignif_0.6.4           
## [125] tibble_3.2.1              statmod_1.5.0            
## [127] tweenr_2.0.3              pkgconfig_2.0.3          
## [129] tools_4.4.2               Rook_1.2                 
## [131] cachem_1.1.0              RSQLite_2.3.7            
## [133] viridisLite_0.4.2         smoother_1.3             
## [135] DBI_1.2.3                 fastmap_1.2.0            
## [137] rmarkdown_2.28            scales_1.3.0             
## [139] pbmcapply_1.5.1           ica_1.0-3                
## [141] broom_1.0.7               sass_0.4.9               
## [143] patchwork_1.3.0           dotCall64_1.1-1          
## [145] carData_3.0-5             RANN_2.6.2               
## [147] farver_2.1.2              scatterpie_0.2.4         
## [149] tidygraph_1.3.1           mgcv_1.9-1               
## [151] yaml_2.3.10               ggthemes_5.1.0           
## [153] cli_3.6.3                 purrr_1.0.2              
## [155] leiden_0.4.3.1            lifecycle_1.0.4          
## [157] uwot_0.2.2                backports_1.5.0          
## [159] BiocParallel_1.38.0       gtable_0.3.5             
## [161] rjson_0.2.23              ggridges_0.5.6           
## [163] progressr_0.14.0          limma_3.60.5             
## [165] parallel_4.4.2            ape_5.8                  
## [167] edgeR_4.2.1               jsonlite_1.8.9           
## [169] seriation_1.5.6           RcppHNSW_0.6.0           
## [171] bit64_4.5.2               Rtsne_0.17               
## [173] yulab.utils_0.1.7         spatstat.utils_3.1-0     
## [175] ranger_0.16.0             urltools_1.7.3           
## [177] RcppParallel_5.1.9        jquerylib_0.1.4          
## [179] highr_0.11                GOSemSim_2.30.2          
## [181] spatstat.univar_3.0-1     R.utils_2.12.3           
## [183] lazyeval_0.2.2            shiny_1.9.1              
## [185] htmltools_0.5.8.1         enrichplot_1.24.4        
## [187] GO.db_3.19.1              sctransform_0.4.1        
## [189] rappdirs_0.3.3            glue_1.8.0               
## [191] spam_2.10-0               httr2_1.0.5              
## [193] XVector_0.44.0            VIM_6.2.2                
## [195] treeio_1.28.0             RMTstat_0.3.1            
## [197] gridExtra_2.3             boot_1.3-31              
## [199] R6_2.5.1                  tidyr_1.3.1              
## [201] DESeq2_1.44.0             vcd_1.4-13               
## [203] labeling_0.4.3            cluster_2.1.6            
## [205] aplot_0.2.3               DelayedArray_0.30.1      
## [207] tidyselect_1.2.1          ggforce_0.4.2            
## [209] dendsort_0.3.4            car_3.1-3                
## [211] AnnotationDbi_1.66.0      future_1.34.0            
## [213] leidenAlg_1.1.3           munsell_0.5.1            
## [215] KernSmooth_2.23-24        laeken_0.5.3             
## [217] data.table_1.16.0         htmlwidgets_1.6.4        
## [219] fgsea_1.30.0              RColorBrewer_1.1-3       
## [221] rlang_1.1.4               spatstat.sparse_3.1-0    
## [223] spatstat.explore_3.3-2    fansi_1.0.6              
## [225] Cairo_1.6-2